I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.

The biggest problem is at small scales, and I'm gonna look at those bins individually.

Fixes to try:

refit with SJ, but removing the yerr weighting
New, simpler kernel?
Fit hps with MaxLike and BO in addition to SloppyJoes?
Plot sat fraction, consdier reducing HOD param space size?



In [51]:

    
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [52]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [53]:

    
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file =  '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'

#xi gm training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5' test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test2/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'



In [54]:

    
em_method = 'gp'
split_method = 'random'



In [55]:

    
a = 1.0
z = 1.0/a - 1.0



In [56]:

    
bin_idx = 0
fixed_params = {'z':z, 'r': 0.09581734}#, 'cosmo': 0}#, 'r':24.06822623}

hp = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_result_indiv_bins.npy')



In [57]:

    
from glob import glob

hp = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_indiv_bins/sloppy_joes_result_indiv_bin_%2d.npy'%bin_idx)



In [58]:

    
hp = np.array([  8.22518016e+00,  -8.48981351e+00,   8.71510289e+00,  -4.00883505e+00,
  -1.20000000e+01,   6.39814872e+00,   2.41769925e+00,   1.28070602e+00,
  -3.23773108e-01,   8.24276778e+00,   1.20000000e+01,  -7.20251694e+00,
  -1.20000000e+01,  -5.17385710e+00,  -4.80026082e-01,  -8.76781990e-01,
  -3.99855599e+00,   1.10634731e+01,  -5.40163410e+00,   1.20000000e+01,
   9.29994915e+00,  -5.05724758e-01,   1.20000000e+01,  -8.49500340e-03,
   7.79086155e+00])



In [59]:

    
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']



In [60]:

    
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)

from collections import defaultdict metric = defaultdict(list) for val, pname in zip(hp, pnames): metric[pname].append(val)



In [61]:

    
from collections import defaultdict
metric = defaultdict(list)

for val, pname in zip(hp, pnames):
    metric[pname].append(val)



In [62]:

    
from time import time
np.random.seed(int(time()))
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1, hyperparams = {'metric':metric})









    



/home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.py:294: UserWarning: WARNING: NaN detected. Skipped 19 points in training data.
  warnings.warn('WARNING: NaN detected. Skipped %d points in training data.' % (num_skipped))

emu.train_metric()



In [ ]:

    
emu.downsample_x.shape









    Out[ ]:





(3998, 11)



In [ ]:

    
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)



In [ ]:

    
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)

t, old_idxs  = emu._whiten(test_x)



In [ ]:

    
params = dict(zip(emu.get_param_names(), test_x[0,:]))

print emu.emulate(params)[0], test_y[0], data_y[0], pred_y[0]

train_x, train_y, train_err, info = emu.get_data(test_file, emu.fixed_params)



In [ ]:

    
mean_func_at_params = emu.mean_function(t)



In [ ]:

    
print np.sqrt(np.mean(np.square((pred_y-data_y)/data_y)))



In [ ]:

    
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y



In [ ]:

    
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat



In [ ]:

    
print np.sqrt(np.mean(np.square(acc_bin)))
print np.mean(acc_bin)



In [ ]:

    
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)



In [ ]:

    
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)



In [ ]:

    
pnames = emu.get_param_names()

for axes1 in xrange(7,11): for axes2 in xrange(axes1+1, 11): cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_acc_bin,cmap = palette, alpha = 0.2) plt.colorbar(cbar) plt.xlabel(pnames[axes1]) plt.ylabel(pnames[axes2]) #plt.gray() plt.show()



In [ ]:



In [ ]:



In [ ]: